# analysis_section_4_1.R
# This script replicates the analysis in Section 4.1 of the paper
#   Figure 2, Figure SI.10, and Figure SI.11
# Author: Yimeng Li
# Dependencies:
#   Data/VR_VoteCal_LA.Rdata
#   Data/Precinct Results/c037_p12_sov_data_by_p12_srprec.csv
#   Data/Precinct Results/c037_p14_sov_data_by_p14_srprec.csv
#   Data/Precinct Results/c037_p16_sov_data_by_p16_srprec.csv
#   Data/Precinct Results/c037_p18_sov_data_by_p18_srprec.csv
#   Data/Precinct Results/c037_p20_sov_data_by_p20_srprec.csv

# Load libraries:
library(dplyr)
library(ggplot2)
library(patchwork)
library(readr)
library(sandwich)
library(tidyr)

#*******************************************************************************
# Import and Prepare Data for Figure 2 and Table SI.10 (Precinct-Level Analysis)
#*******************************************************************************

# Import data:
precinct_results_2012Pri <- read_csv("./Replication Package/Data/Precinct Results/c037_p12_sov_data_by_p12_srprec.csv")
precinct_results_2014Pri <- read_csv("./Replication Package/Data/Precinct Results/c037_p14_sov_data_by_p14_srprec.csv")
precinct_results_2016Pri <- read_csv("./Replication Package/Data/Precinct Results/c037_p16_sov_data_by_p16_srprec.csv")
precinct_results_2018Pri <- read_csv("./Replication Package/Data/Precinct Results/c037_p18_sov_data_by_p18_srprec.csv")
precinct_results_2020Pri <- read_csv("./Replication Package/Data/Precinct Results/c037_p20_sov_data_by_p20_srprec.csv")

# Prepare data for analysis
precinct_results <-
  bind_rows(
    precinct_results_2012Pri %>% select(srprec, addist, cddist, sddist, TOTREG, TOTVOTE) %>% mutate(YEAR = "2012"),
    precinct_results_2014Pri %>% select(srprec, addist, cddist, sddist, TOTREG, TOTVOTE) %>% mutate(YEAR = "2014"),
    precinct_results_2016Pri %>% select(srprec, addist, cddist, sddist, TOTREG, TOTVOTE) %>% mutate(YEAR = "2016"),
    precinct_results_2018Pri %>% select(srprec, addist, cddist, sddist, TOTREG, TOTVOTE) %>% mutate(YEAR = "2018"),
    precinct_results_2020Pri %>% select(srprec, addist, cddist, sddist, TOTREG, TOTVOTE) %>% mutate(YEAR = "2020")
  ) %>%
  filter(srprec != "CNTYTOT") %>%
  mutate(
    TURNOUT = TOTVOTE/TOTREG,
    UVBM = case_when(
      cddist %in% c(38, 39, 47) ~ 1,
      cddist == 32 & sddist == 29 ~ 1,
      cddist == 40 & addist == 58 & sddist == 33 ~ 0,
      cddist == 40 & addist == 58 ~ 1,
      TRUE ~ 0
    )
  )

#*******************************************************************************
# Figure 2
#   Voter Turnout in Universal and Non-Universal VBM Districts in Los Angeles County
#*******************************************************************************

# Prepare data for Figure 2
precinct_results_summary <- precinct_results %>%
  group_by(YEAR, UVBM) %>%
  summarise(
    REG = sum(TOTREG),
    VOTE = sum(TOTVOTE),
    Turnout = sum(TOTVOTE)/sum(TOTREG)
  ) %>%
  mutate(District_Type = if_else(UVBM == 1, "Universal VBM Districts", "Non-Universal VBM Districts"),
         Year = as.numeric(YEAR),
         Year_Type = if_else(Year %in% c(2012, 2016, 2020), "Presidential", "Midterm"))

# Figure 2
Figure_LA_Historical_by_UVBM <- ggplot(data = precinct_results_summary,
                                       aes(x = Year, y = Turnout, colour = District_Type, 
                                           group = interaction(District_Type, Year_Type))) +
  geom_line(size = 1.2, alpha = 0.8) +
  geom_point(size = 3, shape = 16, alpha = 0.8) +
  geom_vline(xintercept = 2018.1, linetype = "dotted", size = 1, color = "red", alpha = 0.8) +
  annotate("text", x = 2018.1, y = 0.4, label = "Policy Implemented", vjust = -1, color = "red") +
  scale_y_continuous(limits = c(0.15, 0.45), breaks = seq(0.15, 0.45, by = 0.05),
                     labels = scales::percent_format(accuracy = 1)) +
  scale_colour_manual(values = c("blue", "orange")) +
  labs(x = "Year", y = "Turnout", colour = "District Type", title = "") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 14),
        legend.position = "bottom", 
        legend.box = "vertical")

# Save Figure
ggsave("./Replication Package/Figure 2.pdf", Figure_LA_Historical_by_UVBM,
       width = 8, height = 4, units = "in", dpi = 600)

#*******************************************************************************
# Figure SI.10
#   Testing Parallel Trends Assumption
#*******************************************************************************

# Prepare data for Figure SI.10
precinct_results_12_16_20 <- precinct_results %>%
  filter(YEAR %in% c("2012", "2016", "2020"), TOTREG > 0) %>%
  mutate(
    YEAR_2012 = if_else(YEAR == 2012, 1, 0),
    YEAR_2020 = if_else(YEAR == 2020, 1, 0),
    Int_UVBM_YEAR_2012 = UVBM*YEAR_2012,
    Int_UVBM_YEAR_2020 = UVBM*YEAR_2020,
    dist = paste0(cddist, ", ", sddist, ", ", addist))

# Run model for Figure SI.10
test_trends <- lm(TURNOUT ~ UVBM + YEAR_2012 + YEAR_2020 + Int_UVBM_YEAR_2012 + Int_UVBM_YEAR_2020,
                  weights = TOTREG,
                  data = precinct_results_12_16_20)

# Obtain coefficient estimates and clustered standard errors 
test_trends_summary <- data.frame(
  coef = 100*coef(summary(test_trends))[, "Estimate"],
  se_clustered = 100*sqrt(diag(vcovCL(test_trends, cluster = ~ dist)))
)

# Calculate 90% and 95% confidence intervals
test_trends_figure <- data.frame(
  year = c(2012, 2016, 2020),
  coef = c(test_trends_summary$coef[5], 0, test_trends_summary$coef[6]),
  lower_90 = c(test_trends_summary$coef[5]+qnorm(.05)*test_trends_summary$se_clustered[5],
               NA_real_,
               test_trends_summary$coef[6]+qnorm(.05)*test_trends_summary$se_clustered[6]),
  upper_90 = c(test_trends_summary$coef[5]+qnorm(.95)*test_trends_summary$se_clustered[5],
               NA_real_,
               test_trends_summary$coef[6]+qnorm(.95)*test_trends_summary$se_clustered[6]),
  lower_95 = c(test_trends_summary$coef[5]+qnorm(.025)*test_trends_summary$se_clustered[5],
               NA_real_,
               test_trends_summary$coef[6]+qnorm(.025)*test_trends_summary$se_clustered[6]),
  upper_95 = c(test_trends_summary$coef[5]+qnorm(.975)*test_trends_summary$se_clustered[5],
               NA_real_,
               test_trends_summary$coef[6]+qnorm(.975)*test_trends_summary$se_clustered[6])
)

# Figure SI.10
Figure_Test_Trends <- ggplot(test_trends_figure, aes(x = year, y = coef)) +
  geom_point(color = "blue", size = 2) +
  geom_errorbar(aes(ymin = lower_90, ymax = upper_90), width = 0.2, size = 1.5) +
  geom_errorbar(aes(ymin = lower_95, ymax = upper_95), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_x_continuous(limits = c(2012, 2020), breaks = seq(2012, 2020, by = 4)) +
  scale_y_continuous(limits = c(-1, 5), breaks = seq(-1, 5, by = 1),
                     labels = scales::percent_format(scale = 1, suffix = " pp")) +
  labs(title = "", x = "Year", y = "Difference in Turnout (r.t. 2016)") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 14),
        legend.position = "bottom",
        legend.box = "vertical")

# Save Figure
ggsave("./Replication Package/Figure SI.10.pdf", Figure_Test_Trends,
       width = 8, height = 4, units = "in", dpi = 600)

#*******************************************************************************
# Import and Prepare Data for Table SI.11 (Individual-Level Analysis)
#*******************************************************************************

# Load data:
load("./Replication Package/Data/VR_VoteCal_LA.Rdata")

# Prepare data for Table SI.11
VR_VoteCal_LA_RegBefore2012Pri_long <- VR_VoteCal_LA %>%
  filter(RegisteredBefore2012Pri == 1, !is.na(Dist)) %>%
  pivot_longer(cols = c("Turnout.2020", "Turnout.2016", "Turnout.2012"),
               names_to = c(".value", "Year"),
               names_sep = "[.]") %>%
  mutate(Year_2012 = if_else(Year == "2012", 1, 0),
         Year_2020 = if_else(Year == "2020", 1, 0),
         Int_UVBM_Year_2012 = UVBM*Year_2012,
         Int_UVBM_Year_2020 = UVBM*Year_2020)

#*******************************************************************************
# Figure SI.11
#   Testing Parallel Trends Assumption by Permanent Absentee Status
#*******************************************************************************

# Run model for Figure SI.11 Top Panel
test_trends_NPAV <- lm(Turnout ~ UVBM + Year_2012 + Year_2020 + Int_UVBM_Year_2012 + Int_UVBM_Year_2020,
                       data = VR_VoteCal_LA_RegBefore2012Pri_long %>% filter(NPAV_since_2012Pri == 1))

# Obtain coefficient estimates and clustered standard errors
test_trends_NPAV_summary <- data.frame(
  coef = 100*coef(summary(test_trends_NPAV))[, "Estimate"],
  se_clustered = 100*sqrt(diag(vcovCL(test_trends_NPAV, cluster = ~ Dist)))
)

# calculate 90% and 95% confidence intervals
test_trends_NPAV_figure <- data.frame(
  year = c(2012, 2016, 2020),
  coef = c(test_trends_NPAV_summary$coef[5], 0, test_trends_NPAV_summary$coef[6]),
  lower_90 = c(test_trends_NPAV_summary$coef[5]+qnorm(.05)*test_trends_NPAV_summary$se_clustered[5],
               NA_real_,
               test_trends_NPAV_summary$coef[6]+qnorm(.05)*test_trends_NPAV_summary$se_clustered[6]),
  upper_90 = c(test_trends_NPAV_summary$coef[5]+qnorm(.95)*test_trends_NPAV_summary$se_clustered[5],
               NA_real_,
               test_trends_NPAV_summary$coef[6]+qnorm(.95)*test_trends_NPAV_summary$se_clustered[6]),
  lower_95 = c(test_trends_NPAV_summary$coef[5]+qnorm(.025)*test_trends_NPAV_summary$se_clustered[5],
               NA_real_,
               test_trends_NPAV_summary$coef[6]+qnorm(.025)*test_trends_NPAV_summary$se_clustered[6]),
  upper_95 = c(test_trends_NPAV_summary$coef[5]+qnorm(.975)*test_trends_NPAV_summary$se_clustered[5],
               NA_real_,
               test_trends_NPAV_summary$coef[6]+qnorm(.975)*test_trends_NPAV_summary$se_clustered[6])
)

# Figure SI.11 (Top Panel)
Figure_Test_Trends_NPAV <- ggplot(test_trends_NPAV_figure, aes(x = year, y = coef)) +
  geom_point(color = "blue", size = 2) +
  geom_errorbar(aes(ymin = lower_90, ymax = upper_90), width = 0.2, size = 1.5) +
  geom_errorbar(aes(ymin = lower_95, ymax = upper_95), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_x_continuous(limits = c(2012, 2020), breaks = seq(2012, 2020, by = 4)) +
  scale_y_continuous(limits = c(-2.5, 5.25), breaks = seq(-2, 5, by = 1),
                     labels = scales::percent_format(scale = 1, suffix = " pp")) +
  labs(title = "Non-Permanent Vote-by-Mail Voters",
       x = "Year",
       y = "Difference in Turnout (r.t. 2016)") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 14),
        legend.position = "bottom",
        legend.box = "vertical")

# Run model for Figure SI.11 Bottom Panel
test_trends_PAV <- lm(Turnout ~ UVBM + Year_2012 + Year_2020 + Int_UVBM_Year_2012 + Int_UVBM_Year_2020,
                      data = VR_VoteCal_LA_RegBefore2012Pri_long %>% filter(PAV_since_2012Pri == 1))

# Obtain coefficient estimates and clustered standard errors
test_trends_PAV_summary <- data.frame(
  coef = 100*coef(summary(test_trends_PAV))[, "Estimate"],
  se_clustered = 100*sqrt(diag(vcovCL(test_trends_PAV, cluster = ~ Dist)))
)

# calculate 90% and 95% confidence intervals
test_trends_PAV_figure <- data.frame(
  year = c(2012, 2016, 2020),
  coef = c(test_trends_PAV_summary$coef[5], 0, test_trends_PAV_summary$coef[6]),
  lower_90 = c(test_trends_PAV_summary$coef[5]+qnorm(.05)*test_trends_PAV_summary$se_clustered[5],
               NA_real_,
               test_trends_PAV_summary$coef[6]+qnorm(.05)*test_trends_PAV_summary$se_clustered[6]),
  upper_90 = c(test_trends_PAV_summary$coef[5]+qnorm(.95)*test_trends_PAV_summary$se_clustered[5],
               NA_real_,
               test_trends_PAV_summary$coef[6]+qnorm(.95)*test_trends_PAV_summary$se_clustered[6]),
  lower_95 = c(test_trends_PAV_summary$coef[5]+qnorm(.025)*test_trends_PAV_summary$se_clustered[5],
               NA_real_,
               test_trends_PAV_summary$coef[6]+qnorm(.025)*test_trends_PAV_summary$se_clustered[6]),
  upper_95 = c(test_trends_PAV_summary$coef[5]+qnorm(.975)*test_trends_PAV_summary$se_clustered[5],
               NA_real_,
               test_trends_PAV_summary$coef[6]+qnorm(.975)*test_trends_PAV_summary$se_clustered[6])
)

# Figure SI.11 (Bottom Panel)
Figure_Test_Trends_PAV <- ggplot(test_trends_PAV_figure, aes(x = year, y = coef)) +
  geom_point(color = "blue", size = 2) +
  geom_errorbar(aes(ymin = lower_90, ymax = upper_90), width = 0.2, size = 1.5) +
  geom_errorbar(aes(ymin = lower_95, ymax = upper_95), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  scale_x_continuous(limits = c(2012, 2020), breaks = seq(2012, 2020, by = 4)) +
  scale_y_continuous(limits = c(-2.5, 5.25), breaks = seq(-2, 5, by = 1),
                     labels = scales::percent_format(scale = 1, suffix = " pp")) +
  labs(title = "Permanent Vote-by-Mail Voters",
       x = "Year",
       y = "Difference in Turnout (r.t. 2016)") +
  theme_bw() +
  theme(panel.border = element_rect(colour = "black", fill = NA, size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line(colour = "black"),
        text = element_text(size = 14),
        axis.text.x = element_text(size = 14, angle = 45, hjust = 1),
        axis.text.y = element_text(size = 14),
        legend.position = "bottom",
        legend.box = "vertical")

# Full Figure
Figure_Test_Trends_combined <- Figure_Test_Trends_NPAV / Figure_Test_Trends_PAV

# Save Figure
ggsave("./Replication Package/Figure SI.11.pdf", Figure_Test_Trends_combined,
       width = 8, height = 8, units = "in", dpi = 600)
